/*==============================================================================
Canada - IC - GDP and Industry 

Outline:
This do file 

I.) merges in Canada provincial GDP data, 1970

II.) merges in Canada industry-detail data.

III. ) Modify codes 

==============================================================================*/

clear all
set more off 

cd "$insheet_files/Canada"

*===============================================================================
* GDP (1970)
*===============================================================================

import excel "CA_GDP_1970.xls", cellrang(A5:C17) firstrow 

tempfile CA_GDP
save `CA_GDP.dta'

*===============================================================================
* Industry (1971) 
*===============================================================================

import excel "CA_IND_1971.xls", first clear

drop if provca==.

keep provca region_name forestry mining disc_mnfg food_and_beverage tobacco ///
 rubber leather textiles knitting clothing wood furniture paper printing /// 
 primary_metal metal_fabricating machinery transport_equip electrical_products ///
 non_metallic_mineral petroleum_and_coal chemicals miscellaneous construction ///
 disc_transportation transportation storage communication electric_power ///
 trade_total fire_total disc_services recreation business_services laundries_etc ///
 hotels_restaurant misc_services total

*redistribute discrepancy between total over missing industry values based on national Canadian industry shares 

*calculate national industry shares
egen total_ca = rsum(forestry mining food_and_beverage-construction transportation-fire_total recreation-misc_services) if region_name=="Canada"

foreach var of varlist  forestry mining food_and_beverage-construction ///
						transportation-fire_total recreation-misc_services {
	replace `var' = . if `var' ==0 
	
	gen ntl_share_`var' = `var'/total_ca if region_name=="Canada"
	egen ntl_share_`var'_m = min(ntl_share_`var') 
	drop ntl_share_`var'
	rename ntl_share_`var'_m ntl_share_`var'
	
}

*redistribute discrepencies in manufacturing, transportation and services
*manufacturing
foreach var of varlist food_and_beverage tobacco rubber leather textiles knitting clothing wood furniture paper printing primary_metal metal_fabricating machinery transport_equip electrical_products non_metallic_mineral petroleum_and_coal chemicals miscellaneous {
	gen mnfg_den_`var' = ntl_share_`var' if `var'==.
}
egen mnfg_fix_denominator = rsum(mnfg_den_*)

foreach var of varlist food_and_beverage tobacco rubber leather textiles knitting clothing wood furniture paper printing primary_metal metal_fabricating machinery transport_equip electrical_products non_metallic_mineral petroleum_and_coal chemicals miscellaneous {
replace `var' = (ntl_share_`var'/mnfg_fix_denominator)*disc_mnfg if `var'==.
}

*transportation
foreach var of varlist transportation storage communication electric_power {
	gen transport_den_`var' = ntl_share_`var' if `var'==.
}
egen transport_fix_denominator = rsum(transport_den_*)

foreach var of varlist  transportation storage communication electric_power {
replace `var' = (ntl_share_`var'/transport_fix_denominator)*disc_transport  if `var'==.
}

*services
foreach var of varlist recreation business_services laundries_etc hotels_restaurant misc_services {
	gen services_den_`var' = ntl_share_`var' if `var'==.
}
egen services_fix_denominator = rsum(services_den_*)

foreach var of varlist recreation business_services laundries_etc hotels_restaurant misc_services {
replace `var' = (ntl_share_`var'/services_fix_denominator)*disc_services  if `var'==.
}


egen ind_energy = rsum(petroleum_and_coal electric_power)
egen ind_mining = rsum(mining non_metallic_mineral chemicals)
gen  ind_construction = construction
egen ind_metals = rsum(primary_metal - electrical_products)
egen ind_other = rsum(food_and_beverage tobacco rubber leather textiles knitting clothing wood furniture paper printing miscellaneous_mnfg)

egen ind_total = rsum(ind_energy - ind_other)

gen ind_mnfg = ind_metals+ind_other

egen serv_commerce = rsum(trade_total hotels)
egen serv_credit = rsum(fire_total business_services)
egen serv_transport = rsum(transportation storage communication)
egen serv_other = rsum(recreation laundries_etc misc_services)

keep provca ind_* serv_* total_excluding_forestry
collapse (sum) ind* serv* total, by(provca)

*replace values for region== Canada with inflated values 
foreach var of varlist ind_* serv_* {

tabstat `var' if provca!=0, stat(sum) save
matrix sum=r(StatTotal)
local sum = sum[1,1]
replace `var' = `sum' if provca==0

}

tempfile ca_industry_detail
save `ca_industry_detail.dta'

*===============================================================================
* Merge with Census 
*===============================================================================

*I. merge .dta files of initial conditions for Canada

use nuts provca region ind* serv* using "$dta_files/IC_CA_CENS1971", clear
	replace provca= 0 if provca==. 

*II.) merge in Canada provincial GDP data

merge 1:1 provca using "`CA_GDP.dta'", gen(_merge_GDP) 

* III.) merge in Canada industry-detail data.

egen total = rsum(ind_agro ind_total serv_total)

* rescale agriculture and administration shares so they stay the same as in census 3749600

foreach var of varlist ind_agro serv_admin {

	gen share_`var' = `var' / total

}

foreach var of varlist ind_energy-ind_total serv_commerce serv_credit serv_transport serv_other serv_total {
	replace `var' = . 
}

drop total
merge 1:1 provca using `ca_industry_detail.dta', update  gen(_merge_ca_industry)

rename total subtotal
replace serv_admin = (share_serv*subtotal + share_serv*share_ind_agro*subtotal / (1 - share_ind_agro)) / (1 - share_serv - share_ind_agro*share_serv/(1 - share_ind_agro))
replace ind_agro = (share_ind_agro*subtotal + share_serv*share_ind_agro*subtotal / (1 - share_serv)) / (1 - share_ind_agro - share_ind_agro*share_serv/(1 - share_serv))

drop subtotal share*
drop serv_total
egen serv_total = rsum(serv*)

*IV. ) Modify codes 

replace region= 124001 if provca ==2
replace region= 124002 if provca ==3
replace region= 124003 if provca ==4
replace region= 124004 if provca ==5
replace region= 124005 if provca ==6
replace region= 124006 if provca ==7
replace region= 124007 if provca ==8 
replace region= 124008 if provca ==9
replace region= 124009 if provca ==10
replace region= 124010 if provca ==1

label val region region_wvs_labels

drop  provca _merge*

drop if region_name=="Yukon"

save "$dta_files/IC_CA_GDP_IND.dta", replace
